set.seed(1782318297)

pacman::p_load(tidyverse, janitor, haven, stargazer, mice, mediation, MatchIt)

##Imputation

lapop <- read_dta("2018 LAPOP AmericasBarometer Merge_v1.0_W.dta")

lapopla <- lapop[complete.cases(lapop$ing4), ]
lapopla <- lapopla[complete.cases(lapopla$pn4), ]

lapopla <- lapopla %>%
  filter(!pais %in% c(40, 41)) %>%
  filter(wave == 2018) %>% 
  mutate(sup = ing4,
         sat = 5-pn4,
         vic = 2 - vic1ext,
         race = ifelse(etid == 1, 1, 0),
         urban = 2 - ur,
         sexo = sex - 1,
         income = r1 + r3 + r4 + r4a + r5 + r6 + r7 + r8 + r12 + r14 +
           r15,
         coup = case_when(jc10 == 1 ~ 1,
                          jc10 == 2 ~ 0,
                          jc13 == 1 ~ 1,
                          jc13 == 2 ~ 0),
         wave = as.factor(wave),
         pais = as.factor(pais))

lapopla <- lapopla %>%
  dplyr::select(sup, sat, aoj11, vic, race, urban, b13, b18, b21, b21a, b31, l1, coup,
                sexo, income, wave, pais, ed, q2, wt, wave
  )

lapopla <- lapopla %>% 
  rename(age = q2,
         female = sexo,
         education = ed,
         country = pais)

in_lapopla <- sapply(lapopla, haven::zap_labels)

in_lapopla <- mice(in_lapopla, m = 5, maxit = 10)

in_lapopla <- complete(in_lapopla, action = 5)

#Matching

##Nearest Neighbor

out2 <- matchit(vic ~ age + female + education + income + race + urban, exact = "country", 
                data = in_lapopla, sweights = ~ wt,
                method = "nearest", distance = "logit")


##Full matching

pacman::p_load(optmatch)

out3 <- matchit(vic ~ age + female + education + income + race + urban, exact = "country", 
                data = in_lapopla, sweights = ~ wt,
                method = "full")



##CEM

pacman::p_load(cem)

out4 <- matchit(vic ~ age + female + education + income + race + urban + country,
                data = in_lapopla, sweights = ~ wt,
                method = "cem")


##Covariate Balance (Figure 1)

pacman::p_load(cobalt)

loveplot_nn <- love.plot(out2, stats = c("m"), binary = "std", continuous = "std",
                         abs = TRUE, thresholds = 0.1, drop.distance = T,
                         sample.names = c("Unmatched", "Matched"),
                         colors = c("black", "gray"))

loveplot_nn <- loveplot_nn + theme(axis.title.x = element_blank(),
                                   plot.title = element_text(family = "Cambria", size = 11),
                                   text=element_text(size=11,  family="Cambria"))

loveplot_full <- love.plot(out3, stats = c("m"), binary = "std", continuous = "std",
                           abs = TRUE, thresholds = 0.1, drop.distance = T, title = "Full Matching",
                           colors = c("black", "gray"))

loveplot_full <- loveplot_full + theme(axis.title.x = element_blank(),
                                       plot.title = element_text(family = "Cambria", size = 11),
                                       text=element_text(size=11,  family="Cambria"))

loveplot_cem <- love.plot(out4, stats = c("m"), binary = "std", continuous = "std",
                          abs = TRUE, thresholds = 0.1,
                          colors = c("black", "gray"))

loveplot_cem <- loveplot_cem + labs(title = "CEM", text=element_text(size=11,  family="Cambria")) + 
  theme(axis.title.x = element_blank(),
        plot.title = element_text(family = "Cambria", size = 11),
        text=element_text(size=11,  family="Cambria"))

pacman::p_load(ggpubr)

ggarrange(loveplot_nn, loveplot_full, loveplot_cem, ncol = 1, nrow = 3,
          common.legend = T, align = "hv")

##Extracting data sets

nn_data <- match.data(out2)

full_data <- match.data(out3)

cem_data <- match.data(out4)

##Descriptive statistics (Table 1)

descriptive <- in_lapopla %>%  
  dplyr::select(sup, sat, vic, race, urban, female,
                             income, education, age)

summary(descriptive)
